//: C17:HTMLStripper.cpp
// From Thinking in C++, 2nd Edition
// Available at http://www.BruceEckel.com
// (c) Bruce Eckel 1999
// Copyright notice in Copyright.txt
// Filter to remove html tags and markers
#include "../require.h"
#include <fstream>
#include <iostream>
#include <string>
using namespace std;

string replaceAll(string s, string f, string r)
{
    unsigned int found = s.find(f);
    while (found != string::npos) {
        s.replace(found, f.length(), r);
        found = s.find(f);
    }
    return s;
}

string stripHTMLTags(string s)
{
    while (true) {
        unsigned int left = s.find('<');
        unsigned int right = s.find('>');
        if (left==string::npos || right==string::npos)
            break;
        s = s.erase(left, right - left + 1);
    }
    s = replaceAll(s, "&lt;", "<");
    s = replaceAll(s, "&gt;", ">");
    s = replaceAll(s, "&amp;", "&");
    s = replaceAll(s, "&nbsp;", " ");
    // Etc...
    return s;
}

int main(int argc, char* argv[])
{
    requireArgs(argc, 1,
                "usage: HTMLStripper InputFile");
    ifstream in(argv[1]);
    assure(in, argv[1]);
    const int sz = 4096;
    char buf[sz];
    while (in.getline(buf, sz)) {
        string s(buf);
        cout << stripHTMLTags(s) << endl;
    }
} ///:~
